# search  匹配第一个, 返回Match对象
# findall 匹配全部, 返回列表
# re.S 让 . 能匹配换行符, 是re.DOTALL的缩写
# (.*?) 匹配任意多个任意字符, 带?是非贪婪匹配, 不带是贪婪匹配

import re

html = """<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <title>Title1</title>
    </title>
</head>
<body>
    <ul>
        <li><a href="https://www.baidu.com/news/1">链接1</a></li>
        <li><a href="https://www.baidu.com/news/2">链接2</a></li>
        <li><a href="https://www.baidu.com/news/3">链接3</a></li>
        <li><a href="https://www.baidu.com/news/4">链接4</a></li>
    </ul>
</body>
</html>"""

# result = re.search('<title>(.*?)</title>', html)
# print(result.group(1))
#
# result = re.search('<title>(.*)</title>', html)
# print(result.group(1))
#
# result = re.search('<title>(.*)</title>', html, re.S)
# print(result.group(1))
#
# result = re.search('<title>(.*)</title>', html, re.DOTALL)
# print(result.group(1))


items = re.findall('<li><a href="https://www.baidu.com/news/1">(.*?)</a></li>', html)
print(items)

items = re.findall('<li><a href="https://www.baidu.com/news/(.*?)">(.*?)</a></li>', html)
print(items)

items = re.findall('<li><a href="https://www.baidu.com/news/.*?">(.*?)</a></li>', html)
print(items)

items = re.findall('<li><a href="(.*?)">(.*?)</a></li>', html)
for item in items:
    print(f"链接:{item[0]}, 名字:{item[1]}")